//gen random weights
//get optimal policy

//get new optimal policy , add to set of policies
//--run for n times--
del=.1;
w=rand(ws,1);
w=w/norm(w,1);
wc=zeros(ws,101);
wc(:,1)=w;
pls1=atos2(genpls2(w,1));
for ns=1:100
    wa=w;
    r1=floor(rand()*ws)+1;
    r2=floor(rand()*2)*2-1;
    r2=r2*del;
    wa(r1)=wa(r1)+r2;
    wa=wa/norm(wa,1);
    pls2=atos2(genpls2(wa,1));
    if sum(evalr(pls1)<evalr(pls2))
    //if evalr(pls1)*w<evalr(pls2)*wa
        w=wa;
    end
    wc(:,ns+1)=w;
    printf("%d\n",ns);
end
